package it.uniroma2.svd.writer;

import it.uniroma2.svd.DataInterface;

import java.io.FileWriter;
import java.io.PrintWriter;
import java.util.Hashtable;
import java.util.StringTokenizer;
import java.util.TreeMap;
import java.util.Vector;

import it.uniroma2.svd.writer.BinaryMatrix;
import it.uniroma2.svd.writer.FileTools;
import it.uniroma2.svd.writer.ReadFileLSA;



public class FileToolsTxt  implements FileTools {
	
	PrintWriter output_ = null;
	
	/**
	 * apre il canale di Output proprio dei metodi di scrittura testuale
	 * @param PathOutput
	 * @throws Exception
	 */
	public void initHandle  (String PathOutput) throws  Exception {
		
		output_ = new PrintWriter(new FileWriter(PathOutput), true);

		
	}
	
	public void closeHandle () throws  Exception {
		
		output_.flush();
		output_.close();
		
	}
	
	/**
	 * 
	 * carica i dati nelle matrici LSA strutturandoli nei diversi campi leggendoli da file (formato Testuale)
	 *
	 * @throws Exception
	 */
	
	public  Object[]  loadToken (String PathInput)  throws  Exception {
			System.out.println("Loading "+PathInput);
			Hashtable<String, Integer>dataHash = new Hashtable<String, Integer>();
			ReadFileLSA read = new ReadFileLSA(PathInput, "\t");
			
			Vector<String> Info = read.getInfo();
			Vector<Float> TF = read.getTF();
			Vector<Object> Data = read.getData();
			
			String feature = "";
			StringTokenizer st1;
			
			int row = Info.size();
			Object[][] data = new Object[row][4];

						
			for (int i = 0; i < row; i++) {

				feature = Info.elementAt(i);
				data[i][0] = feature;
				data[i][1] = TF.elementAt(i);
				data[i][2] = Float.valueOf(0.0F);
				
				st1 = new StringTokenizer((String)Data.elementAt(i),",");
				boolean zero = true;
				float[] matrixData = new float[st1.countTokens()];
				int j=0; 
				while (st1.hasMoreTokens()){ 
					matrixData[j] = Float.parseFloat(st1.nextToken());
					
					if(zero && matrixData[j]!=0F){
						zero = false;
					}
					j++;
				}
				if(zero){
					System.out.println("***Loaded zero row at "+i);
				}
				data[i][3] = matrixData;

				dataHash.put(feature.trim(), new Integer (i));	// !!!  questa e' la Hashtable con cui cercare le parole nelle matrici
				if (i % 1000 == 0)                                  
					System.out.print(".");                 
			}
			
			
			
			Object[] ritorni ={data, dataHash};	// array dei dati, hashtable chiavi
			System.out.println(" ... Loaded");
			return ritorni;
	
		}
	
//
//			
//	/**
//	 * Scrive su File i vettori  LSA (formato testuale)
//	 * 
//	 * @param matrixData  la matrice dei dati (index(doc o word), LSA vector) 
//	 * @param pathOut	il pathname dove scrivere il file 
//	 * @param reverse	HashMap<Integer, String>  (indexDoc, NameDoc) opp (indexWord, nameWord)
//	 * @param weight	BinaryMatrix<Float> descrittore del file delle pesature (null se matrixData e'
//	 * la matrice dei documenti)
//	 * @throws Exception
//	 */	
//	
//	public void writeToken (float[][] matrixData, 
//							String pathOut, 
//							HashMap<Integer, String> reverse,
//							BinaryMatrix<Float>   weight)  throws  Exception {
//		
//		try {
//			
//			PrintWriter output_ = new PrintWriter(new FileWriter(pathOut), true);
//
//			int row = matrixData.length;
//			int column = matrixData[0].length;
//			
//		 	System.out.println("path Out writing in progress (One dot every 1000 lines): "+pathOut);
//
//			for (int index = 0; index < row; index++) {  
//				String line = "", lineData = "";  // formato es.  nome_feature	TF	0	-0.280741, ...
//												  // formato es.  nome_pattern	0	0	-0.280741, ...
//				
//			 	line = reverse.get(index);
//			 	
//			 	for (int n = 0; n < column; n++) { 		
//			 		if (n == column-1)
//				 		lineData+= matrixData[index][n];
//			 		else
//			 			lineData+= matrixData[index][n]+",";
//				}
//			 	
//			 	if (weight == null)	//			  caso in cui non sono state calcolate le matrici di pesatura
//												// oppure scrive i documenti
//
//			 		line = line + "\t"+0+"\t"+0+"\t"+lineData;
//			 	else
//			 		line = line + "\t"+WeigthW(weight, index)+"\t"+0+"\t"+lineData;
//
//		 		output_.println(line);		// scrive la riga su file
//		 		
//			 	if (index % 1000 == 0)
//			 		System.out.print(".");
//
//			}
//			
//			output_.close();
//			
//
//			
//		} catch ( Exception e ) {  
//			throw e;
//		} 
//	}
//	
//	/**
//	 * Scrive su File i vettori  LSA (formato testuale)
//	 * 
//	 * @param matrixData  la matrice dei dati (index(doc o word), LSA vector) 
//	 * @param pathOut	il pathname dove scrivere il file 
//	 * @param reverse	HashMap<Integer, String>  (indexDoc, NameDoc) opp (indexWord, nameWord)
//	 * @param WeightPath	il file delle pesature. Usato per i termini se BinaryMatrix<Float> e' troppo lento 
//	 * la matrice dei documenti) - Il file contiene su ogni riga il valore esatto del peso per il termine in corrispondenza della riga
//	 * @throws Exception
//	 */	
//	
//	public void writeToken (float[][] matrixData, 
//							String pathOut, 
//							HashMap<Integer, String> reverse,
//							String WeightPath)  throws  Exception {
//		
//		try {
//			
//			PrintWriter output_ = new PrintWriter(new FileWriter(pathOut), true);
//			ReadFileWeigth read = new ReadFileWeigth(WeightPath);
//			Vector weight = read.getWeight();
//
//
//		 	System.out.println("path Out writing in progress (One dot every 1000 lines): "+pathOut);
//
//
//			int row = matrixData.length;
//			int column = matrixData[0].length;
//			
//			for (int index = 0; index < row; index++) {  
//				String line = "", lineData = "";  // formato es.  nome_feature	TF	0	-0.280741, ...
//												  // formato es.  nome_pattern	0	0	-0.280741, ...
//				
//			 	line = reverse.get(index);
//			 	
//			 	for (int n = 0; n < column; n++) { 		
//			 		if (n == column-1)
//				 		lineData+= matrixData[index][n];
//			 		else
//			 			lineData+= matrixData[index][n]+",";
//				}
//			 	
//			 	if (weight == null)	//			  caso in cui non sono state calcolate le matrici di pesatura
//												// oppure scrive i documenti
//
//			 		line = line + "\t"+0+"\t"+0+"\t"+lineData;
//			 	else
//			 		line = line + "\t"+weight.elementAt(index)+"\t"+0+"\t"+lineData;
//
//		 		output_.println(line);		// scrive la riga su file
//		 		
//			 	if (index % 1000 == 0)
//			 		System.out.print(".");
//
//			}
//			
//			output_.close();
//			
//
//			
//		} catch ( Exception e ) {  
//			throw e;
//		} 
//	}
//
//	
//
//
//	

	


/**
 * Calcola una pesatura media delle parole (in funzione della pesatura originale: TF o TFIdf) rispetto a tutti i doc
 * @param weight descrittore del file delle pesature (null se matrixData e'
 * @param indexRow indice della riga (parola)
 * @return
 * @throws Exception
 */	
	protected static Float WeigthW (BinaryMatrix<Float>  weight,	int indexRow)  throws  Exception {


			TreeMap<Integer,Float> WeightWord = weight.getSparseRow(indexRow); // dato indexRow estrae la HashMap <indexDoc, weightWord_in_Doc> 
			float weigthW = 0.0F;
			
	 		for(Integer id_d : WeightWord.keySet())			 			
	 			weigthW +=  WeightWord.get(id_d).floatValue();	// somma i pesi di quella parola rispetto a tutti i docs		 			
	 		
	 		if (WeightWord.size() != 0)
	 			weigthW = weigthW/WeightWord.keySet().size();
	 		else
	 			weigthW = 0.0F;
		

	 		
	 	
	 	return weigthW;
	 	
 	}


	/**
	 * Scrive su File i vettori  LSA (formato testuale)
	 * 
	 * @param matrixData  la matrice dei dati ristretta al vettore corrisp a una linea (index(doc o word), LSA vector) 
	 * @param index l'indice riga
	 * @param data	HashMap<Integer, String>  (indexDoc, NameDoc) opp (indexWord, nameWord)
	 * @param output_ il printerWriter di scrittura 
	 * @param type DOC o WORD
	 * @throws Exception
	 */	


public void writeTokenLine (float[] matrixData, 
		int index,
		DataInterface data,
		FileTools.type type) throws Exception {
	
	try {
		

		int column = matrixData.length;
		Float weight;
		String line = "", lineData = "";  	// formato es.  nome_feature	TF	0	-0.280741, ...
											  // formato es.  nome_pattern	0	0	-0.280741, ...
			
			switch (type) {
			case DOC:
				line = data.getDoc(index);
				break;
			case WORD:
				line = data.getWord(index);
				break;
			default:
				break;
			}
		 	
		 	
		 	for (int n = 0; n < column; n++) { 		
		 		if (n == column-1)
			 		lineData+= matrixData[n];
		 		else
		 			lineData+= matrixData [n]+",";
			}

		 	if(type == FileTools.type.DOC)
		 		weight = 0.0F;
		 	else
		 		weight =((data.getWordWeight(index)!=null)?(new Float(data.getWordWeight(index))):0.0F);

	 		line = line + "\t"+weight+"\t"+0+"\t"+lineData;

	 		output_.println(line);		// scrive la riga su file
	 		
		 	if (index % 1000 == 0)
		 		System.out.print(".");


		
	} catch ( Exception e ) {  
		throw e;
	} 	
	
	
}
	

	public void writeToken(float[][] matrixData, String pathOut,  DataInterface data,  FileTools.type type) throws Exception {  
		try {
			
			PrintWriter output_ = new PrintWriter(new FileWriter(pathOut), true);                 
			int row = matrixData.length;                 
			int column = matrixData[0].length;
			
			System.out.println("path Out writing in progress (One dot every 1000 lines): "+pathOut); 
			Float weight;
			
			for (int index = 0; index < row; index++) {

				String line = "", lineData = "";  	// formato es.  nome_feature	TF	0	-0.280741, ...
													  // formato es.  nome_pattern	0	0	-0.280741, ...

				switch (type) {
				case DOC:
					line = data.getDoc(index);
					break;
				case WORD:
					line = data.getWord(index);
					break;

				default:
					break;
				}
			 	
			 	
			 	for (int n = 0; n < column; n++) { 		
			 		if (n == column-1)
				 		lineData+= matrixData[index][n];
			 		else
			 			lineData+= matrixData [index][n]+",";
				}

			 	if(type == FileTools.type.DOC)
			 		weight = 0.0F;
			 	else
			 		weight =((data.getWordWeight(index)!=null)?(new Float(data.getWordWeight(index))):0.0F);
			 	
		 		line = line + "\t"+weight+"\t"+0+"\t"+lineData;
				
			
			output_.println(line); // scrive la riga su file 
			
			if (index % 1000 == 0)                                  
				System.out.print(".");                 
			
			}                                 
			
		output_.close();
			
		} catch ( Exception e ) {  
			throw e;
		} 	
		
	}


}
